% analyse_TGM2025_data

% Analyses for Thompson, Grady, Morrison (2025) Does explaining the meaning of likelihood ratios improve lay understanding?

% tested on Matlab 2025a
% requires statistics_toolbox

% initialize
close all
clear all
clc


% load data

data_filename = 'TGM2025_data.xlsx'; % The data file 'TGM2025_data.xlsx' should be placed in the same folder as this script.

opts = detectImportOptions(data_filename);

opts.SelectedVariableNames = ["Condition_high_prior", "Condition_LR_3000", "Condition_explanation"];
II_conditions = table2array(readtable(data_filename, opts));

opts.SelectedVariableNames = ["Prior_odds_H1", "Prior_odds_H2", "Posterior_odds_H1", "Posterior_odds_H2"];
log10odds_raw = log10(table2array(readtable(data_filename, opts)));
log10odds_raw(isnan(log10odds_raw)) = 0;
log10odds = [log10odds_raw(:,1)-log10odds_raw(:,2), log10odds_raw(:,3)-log10odds_raw(:,4)];

opts.SelectedVariableNames = ["Expert_biased_reversed", "Expert_qualified", "Expert_credible", "Expert_methods", "Expert_trustworthy"];
testimony_quality = table2array(readtable(data_filename, opts));
testimony_quality_mean = mean(testimony_quality, 2);


% Fig 1 - violin plot for prior odds

log10prior_odds_cells = cell(1,2);
for I_prior_condition = 0:1
    II_prior_condition = II_conditions(:,1) == I_prior_condition;
    log10prior_odds_cells{I_prior_condition+1} = log10odds(II_prior_condition,1);
end

figure(1);
fprintf('\n\n')
bw = 0.2;
violin(log10prior_odds_cells, 'bw', bw, 'xlabel',{'low prior','high prior',}, 'facecolor',[0.9 0.9 0.9], 'mc',[], 'plotlegend',false, 'same_area',true);

box on
grid on
ylabel('log_{10}(prior odds)');

label_font_size = 14;
tick_font_size = 12;

ax = gca;
ax.XAxis.FontSize = label_font_size;
ax.YAxis.FontSize = tick_font_size;
ax.YLabel.FontSize = label_font_size;
ax.LineWidth = 1;

ylim([-6 6]);
yticks(-6:6);


% Fig 2 - violin plot for effective likelihood ratios + organize data for Tables
% In the published paper, the the violin plots has been reordered relative to the order plotted here.

log10LR_effective = log10odds(:,2) - log10odds(:,1);

log10LR_effective_cells = cell(1,8);
count_participants = NaN(2,8);
count_orthodox = NaN(2,8);
count_post_equals_LR = NaN(2,8);
testimony_quality_mean_cells = cell(1,8);

II_conditions_TF = false(length(II_conditions),8);

log10LR_orthodox_range = [log10(30*0.9), log10(30*1.1); ...
                          log10(3000*0.9), log10(3000*1.1)];
log10_presented = [log10(30), log10(3000)];

median_distance_to_presented_log10LR = NaN(2,8);

I_cell = 0;
for I_explanation = 0:1
    II_explanation = II_conditions(:,3) == I_explanation;
    for I_LR_value = 0:1
        II_LR_value = II_conditions(:,2) == I_LR_value;
        for I_prior_condition = 0:1
            I_cell = I_cell+1;

            II_prior_condition = II_conditions(:,1) == I_prior_condition;
            II_temp = II_explanation & II_LR_value & II_prior_condition;
            II_conditions_TF(:,I_cell) = II_temp;

            % all participants for whom effective LR = presented LR
            
            % num participants
            num_II_temp = sum(II_temp);
            count_participants(1,I_cell) = num_II_temp;

            % effective LR
            log10LR_effective_temp = log10LR_effective(II_temp);
            log10LR_effective_cells{I_cell} = log10LR_effective_temp;

            % median distance to presented log10LR
            signed_difference_from_presented_log10LR = log10LR_effective_temp - log10_presented(I_LR_value+1);
            unsigned_distance_to_presented_log10LR = abs(signed_difference_from_presented_log10LR);
            median_distance_to_presented_log10LR(:,I_cell) = [median(signed_difference_from_presented_log10LR); median(unsigned_distance_to_presented_log10LR)];
            
            % num participants for whom effective LR = presented LR
            II_orthdox = log10LR_effective_temp >= log10LR_orthodox_range(I_LR_value+1,1) & log10LR_effective_temp <= log10LR_orthodox_range(I_LR_value+1,2);
            count_orthodox(1,I_cell) = sum(II_orthdox);

            % posterior odds
            log10_posterior_odds_temp = log10odds(II_temp,2);
            
            % participants for whom effective LR = presented LR, but excluding participants for whom prior odds = 1 
            
            % num participants
            II_prior_one = log10odds(II_temp,1) == 0;
            num_prior_one_and_orthodox = sum(II_prior_one & II_orthdox);
            count_participants(2,I_cell) = num_II_temp - num_prior_one_and_orthodox;
            
            % num participants for whom effective LR = presented LR
            count_orthodox(2,I_cell) = count_orthodox(1,I_cell) - num_prior_one_and_orthodox;
            
            % participants for whom posterior odds = LR, including or excluding participants for whom prior odds = 1
            II_post_equals_LR = log10_posterior_odds_temp == log10_presented(I_LR_value+1);
            count_post_equals_LR(1,I_cell) = sum(II_post_equals_LR);
            count_post_equals_LR(2,I_cell) = count_post_equals_LR(1,I_cell) - num_prior_one_and_orthodox;

            % testimony quality (organizing for later use)
            testimony_quality_mean_cells{I_cell} = testimony_quality_mean(II_temp);
        end
    end
end

figure(2); 
fprintf('\n\n')
bw = 0.2;
[h,L,MX,MED,bw,width] = violin(log10LR_effective_cells, 'bw', bw, 'facecolor',[0.9 0.9 0.9], 'mc',[], 'plotlegend',false, 'same_area',true);

ylim([-6 6])
yticks(-6:6)

box on
grid on
ylabel('effective log_{10}(\Lambda)');

ax = gca;
ax.PlotBoxAspectRatio = [2 1 1];
ax.XAxis.FontSize = label_font_size;
ax.YAxis.FontSize = tick_font_size;
ax.YLabel.FontSize = label_font_size;
ax.LineWidth = 1;

plot([0.5 2.5], log10([30 30]), '-k', 'LineWidth',1); 
plot([4.5 6.5], log10([30 30]), '-k', 'LineWidth',1); 
plot([2.5 4.5], log10([3000 3000]), '-k', 'LineWidth',1); 
plot([6.5 8.5], log10([3000 3000]), '-k', 'LineWidth',1); 


% Tables

sum_participants = [sum(count_participants(:,1:4),2), sum(count_participants(:,5:8),2)];

fprintf('\nTable 1 - number of participants')
fprintf('\nExpln\tLR=30\t\tLR=3k\t\ttotal')
fprintf('\n\tlow\thigh\tlow\thigh')
fprintf('\ny\t%0.0f\t%0.0f\t%0.0f\t%0.0f\t%0.0f', [count_participants(1,5:8), sum_participants(1,2)])
fprintf('\nn\t%0.0f\t%0.0f\t%0.0f\t%0.0f\t%0.0f', [count_participants(1,1:4), sum_participants(1,1)])
fprintf('\n')

pcnt_orthodox = (count_orthodox ./ count_participants) * 100;
sum_orthodox = [sum(count_orthodox(:,1:4),2), sum(count_orthodox(:,5:8),2)];
pcnt_sum_orthodox = (sum_orthodox ./ sum_participants) * 100;

fprintf('\nTable 2a - percent effective LR = presented LR, including prior odds = 1')
fprintf('\nExpln\tLR=30\t\tLR=3k\t\tboth')
fprintf('\n\tlow\thigh\tlow\thigh\tboth')
fprintf('\ny\t%0.1f\t%0.1f\t%0.1f\t%0.1f\t%0.1f', [pcnt_orthodox(1,5:8), pcnt_sum_orthodox(1,2)])
fprintf('\nn\t%0.1f\t%0.1f\t%0.1f\t%0.1f\t%0.1f', [pcnt_orthodox(1,1:4), pcnt_sum_orthodox(1,1)])
fprintf('\n')

fprintf('\nTable 2b - percent effective LR = presented LR, excluding prior odds = 1')
fprintf('\nExpln\tLR=30\t\tLR=3k\t\tboth')
fprintf('\n\tlow\thigh\tlow\thigh\tboth')
fprintf('\ny\t%0.1f\t%0.1f\t%0.1f\t%0.1f\t%0.1f', [pcnt_orthodox(2,5:8), pcnt_sum_orthodox(2,2)])
fprintf('\nn\t%0.1f\t%0.1f\t%0.1f\t%0.1f\t%0.1f', [pcnt_orthodox(2,1:4), pcnt_sum_orthodox(2,1)])
fprintf('\n')

fprintf('\nTable 3a - median signed difference between log10 effective LRs and log10 of presented LR')
fprintf('\nExpln\tLR=30\t\tLR=3k')
fprintf('\n\tlow\thigh\tlow\thigh')
fprintf('\ny\t%0.2f\t%0.2f\t%0.2f\t%0.2f', median_distance_to_presented_log10LR(1,5:8))
fprintf('\nn\t%0.2f\t%0.2f\t%0.2f\t%0.2f', median_distance_to_presented_log10LR(1,1:4))
fprintf('\n')

fprintf('\nTable 3a - median unsigned distance between log10 effective LRs and log10 of presented LR')
fprintf('\nExpln\tLR=30\t\tLR=3k')
fprintf('\n\tlow\thigh\tlow\thigh')
fprintf('\ny\t%0.2f\t%0.2f\t%0.2f\t%0.2f', median_distance_to_presented_log10LR(2,5:8))
fprintf('\nn\t%0.2f\t%0.2f\t%0.2f\t%0.2f', median_distance_to_presented_log10LR(2,1:4))
fprintf('\n')

pcnt_count_post_equals_LR = (count_post_equals_LR ./ count_participants) * 100;
sum_post_equals_LR = [sum(count_post_equals_LR(:,1:4),2), sum(count_post_equals_LR(:,5:8),2)];
pcnt_sum_post_equals_LR = (sum_post_equals_LR ./ sum_participants) * 100;

fprintf('\nTable 4a - posterior = presented LR, including prior odds = 1')
fprintf('\nExpln\tLR=30\t\tLR=3k\t\tboth')
fprintf('\n\tlow\thigh\tlow\thigh\tboth')
fprintf('\ny\t%0.1f\t%0.1f\t%0.1f\t%0.1f\t%0.1f', [pcnt_count_post_equals_LR(1,5:8), pcnt_sum_post_equals_LR(1,2)])
fprintf('\nn\t%0.1f\t%0.1f\t%0.1f\t%0.1f\t%0.1f', [pcnt_count_post_equals_LR(1,1:4), pcnt_sum_post_equals_LR(1,1)])
fprintf('\n')

fprintf('\nTable 4b - posterior = presented LR, excluding prior odds = 1')
fprintf('\nExpln\tLR=30\t\tLR=3k\t\tboth')
fprintf('\n\tlow\thigh\tlow\thigh\tboth')
fprintf('\ny\t%0.1f\t%0.1f\t%0.1f\t%0.1f\t%0.1f', [pcnt_count_post_equals_LR(2,5:8), pcnt_sum_post_equals_LR(2,2)])
fprintf('\nn\t%0.1f\t%0.1f\t%0.1f\t%0.1f\t%0.1f', [pcnt_count_post_equals_LR(2,1:4), pcnt_sum_post_equals_LR(2,1)])
fprintf('\n')


% Fig 3 - plot effective logLR against judgement of testimony quality

for I_cell = 1:8
    figure;
    % II_prior_odds_1 = log10odds(II_conditions_TF(:,I_cell),1)==0; % use this to exclude prior odds = 1
    scatter_plot = scatter(testimony_quality_mean_cells{I_cell}, log10LR_effective_cells{I_cell}, 200, 'b', 'filled');
    scatter_plot.MarkerFaceAlpha = 0.2;
    ylim([-6 6])
    xlim([1 5])
    axis square
    title([])
    xlabel('Perceived quality of testimony')
    ylabel('effective log$_{10}(\Lambda)$', 'Interpreter','latex');
    % xlabel([])
    % ylabel([])
    box on
    grid on
end
